library(tidyverse) #version 1.2.1
library(lubridate) #version 1.7.4
library(gtrendsR) #version 1.4.2
library(reshape2) #version 1.4.3
library(scales) #version 1.0.0
library(doParallel) #1.0.14

#function to gather google trends data via the google trends api
gather_trends2 <- function(keyword, time) {
  google.trends <- gtrends(keyword, geo = c("NL"), gprop = "web", time = time)[[1]] %>%
    as.tibble() %>%
    dplyr::select(date, hits) %>%
    mutate(date = ymd(date))
  
  return(google.trends)
}

#Time Periods 2017
start_time17 <- ymd("2016-05-03")+seq(0,607,1)
end_time17 <- ymd("2017-01-01")+seq(0,607,1)
time_periods17 <- paste(start_time17, end_time17, sep = " ")
time_periods17[1]

#Time Periods 2016
start_time16 <- ymd("2015-05-03")+seq(0,608,1)
end_time16 <- ymd("2016-01-01")+seq(0,608,1)
time_periods16 <- paste(start_time16, end_time16, sep = " ")

time_periods16_needed <- time_periods16[!time_periods16 %in% time_periods17] #not all 2016 periods are needed


#Search terms 17: limited to five per batch
search_term <- c("datalek", "hacking", 
                 "festival", "files",
                 "depressie", "griep", 
                 "hooikoorts", "influenza", 
                 "verkoudheid", "terrorist")


#Different keyword specification leads to different results. Be careful. See below.
gather_trends2(keyword = search_term[1], time = time_periods[1]) %>%
  print(n = 10)
gather_trends2(keyword = search_term[1:5], time = time_periods[1]) %>%
  print(n = 10)
gather_trends2(keyword = "griep", time = time_periods[1]) %>%
  print(n = 10)
gather_trends2(keyword = c("griep", "datalek"), time = time_periods[1]) %>%
  print(n = 10)


#Use the following chunk to gather the 2016 GT data
#change "t = time_periods16_needed[1:366]" to look up 2017 data or any other data
#note that if you search for several terms within one day, your IP would be blocked by google
#so spread your task across days
################################## begin: parallel processing ##############################
Sys.sleep(runif(1,3,5)*60)
cl <- makeCluster(detectCores() - 1)
registerDoParallel(cl = cl)
writeLines("", con = "log.txt")

i <- 10

tstart <- Sys.time()
google_data <- foreach(lookup = search_term[i]) %:% foreach(t = time_periods16_needed[1:366],
                       .packages = c("tidyverse", "gtrendsR", "lubridate")) %dopar% {
                         
                         cat(t, " ",lookup,"\n", file = "log.txt", append = TRUE)
                         
                         gather_trends2(keyword = lookup, time = t)
                       }
Sys.time() - tstart
stopCluster(cl = cl)
################################## end: parallel processing ################################


#Use the following chunk to pre-process and output your search results for each term
################################ Post-Processing ###########################################
google_df <- google_data[[1]] %>%
  reduce(full_join, by = "date") %>%
  add_column(keyword = search_term[i]) %>%
  dplyr::select(date, keyword, everything())

old_names <- colnames(google_df)[3:dim(google_df)[2]]
new_names <- paste("sample", as.character(1:length(time_periods16_needed)), sep = "")
google_df <- google_df %>% rename_at(vars(old_names), ~ new_names)
filename <- paste("GT16_",search_term[i],".csv", sep = "")
write.csv(google_df, filename)
############################################################################################
#end
